---
title: "CHC Analysis"
output: html_document
---

```{r setup, include=FALSE}
library(DESeq2)
library("magrittr")
library("dplyr")
library("ggplot2")
library("reshape2")
library("RColorBrewer")
library("gplots")

#Read in read count matrix and sample information
matrix = read.table("adult2.counts_adjusted.txt")
samples = read.table("adults2.txt", sep="\t", header=TRUE)
samples$ID = samples$good


#Remove loci with less than 10 counts over all samples
adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]
matrix = as.matrix(adults_new)
matrix = round(matrix)


#Read in list of putative CHC and OD genes
to_test = read.table("new_trans_alltotest.csv", header=FALSE)



#Read in annotation information
annotation = read.csv("new_trinotate_annotation_report.csv", header=TRUE, sep=",")


```

#run DESEQ for all adults for all loci
```{r}
dds <- DESeqDataSetFromMatrix(countData = matrix,
                              colData = samples,
                              design = ~ Sex + Genotype + Sex*Genotype)
dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)
```


#separate out genotype and sex significant DE genes alpha = 0.05 and logfold min > 2
```{r}

#Loci by genotype effect
res_geno <- results(dds, name="Genotype_BB_vs_AA", alpha=0.05) %>% as.data.frame
res_geno = na.omit(res_geno)

#subset to putative OD and CHC genes then run FDR 
res_gen2 = subset(res_geno, rownames(res_geno) %in% to_test$V1)
res_gen2$test = p.adjust(res_gen2$pvalue, method = "BH", n = length(res_gen2$pvalue))
resx_geno <- res_gen2 %>% subset(test < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame

#Loci by sex effect
res_sex <- results(dds, name="Sex_Male_vs_Female", alpha=0.05)  %>%  as.data.frame
res_sex = na.omit(res_sex)

#subset to putative OD and CHC genes then run FDR 
res_sex2 = subset(res_sex, rownames(res_sex) %in% to_test$V1)
res_sex2$test = p.adjust(res_sex2$pvalue, method = "BH", n = length(res_sex2$pvalue))
resx_sex <- res_sex2 %>% subset(test < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame


```


#get annotation info and write
```{r}
resx_sex$transcript_id = rownames(resx_sex)
sex = merge(resx_sex, annotation, by=c("transcript_id"))
write.table(sex, file="sex_trans.csv", sep=",")


resx_geno$transcript_id = rownames(resx_geno)
geno = merge(resx_geno, annotation, by=c("transcript_id"))
write.table(geno, file="geno_trans.csv", sep=",")

```

#Do just males
```{r}

#remake the DESeq data
matrix = read.table("adult2.counts.matrix")
samples = read.table("adults2.txt", sep="\t", header=TRUE)
samples$ID = samples$good


adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]



matrix = as.matrix(adults_new)
matrix2 = round(matrix)



dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Genotype)


#keep only males
dds <- dds[,c(2,5,7,10,12,13,14,16)]

#run DESeq for all loci
dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)

```

#separate out genotype, sex, and interaction significant DE genes alpha = 0.05 and logfold > 2
```{r}

res_Mgeno <- results(dds, name="Genotype_BB_vs_AA", alpha=0.05)
res_Mgeno = na.omit(res_Mgeno)

#subset to putative OD and CHC genes then run FDR 
res_Mgen2 = subset(res_Mgeno, rownames(res_Mgeno) %in% to_test$V1)
res_Mgen2$test = p.adjust(res_Mgen2$pvalue, method = "BH", n = length(res_Mgen2$pvalue))
resx_Mgeno <- res_Mgen2 %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame


```


#Do just females
```{r}
#remake the DESeq data

matrix = read.table("/Users/emmaberdan/Documents/RNA study/New Transcriptome/RSEM/adult2.counts.matrix")
samples = read.table("adults2.txt", sep="\t", header=TRUE)
samples$ID = samples$good
pos = read.table("/Users/emmaberdan/Documents/RNA study/positions.csv", sep=",", header=TRUE)

adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]


matrix = as.matrix(adults_new2)
matrix2 = round(matrix)



dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Genotype)


#keep only females
dds <- dds[,c(1,3,4,6,8,9,11,15,17)]

#run DESeq for all loci
dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)

```

#separate out genotype alpha = 0.05 and logfold > 2
```{r}

res_Fgeno <- results(dds, name="Genotype_BB_vs_AA", alpha=0.05)
res_Fgeno = na.omit(res_Fgeno)

#subset to putative OD and CHC genes then run FDR 
res_Fgen2 = subset(res_Fgeno, rownames(res_Fgeno) %in% to_test$V1)
res_Fgen2$test = p.adjust(res_Fgen2$pvalue, method = "BH", n = length(res_Fgen2$pvalue))
resx_Fgeno <- res_Fgen2 %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame


```


#get annotation info and write
```{r}


resx_Mgeno$transcript_id = rownames(resx_Mgeno)
Mgeno = merge(resx_Mgeno, annotation, by=c("transcript_id"))
write.table(Mgeno, file="male_geno_trans.csv", sep=",")


resx_Fgeno$transcript_id = rownames(resx_Fgeno)
Fgeno = merge(resx_Fgeno, annotation, by=c("transcript_id"))
write.table(Fgeno, file="female_geno_trans.csv", sep=",")
```



